{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d001ba4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys  \n",
    "import json\n",
    "import torch \n",
    "import argparse\n",
    "import numpy as np\n",
    "from PIL import Image  \n",
    "from tqdm import tqdm\n",
    "from utils import model_gen, load_jsonl\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer  \n",
    "\n",
    "#### get the files from \"https://github.com/AoiDragon/POPE/tree/e3e39262c85a6a83f26cf5094022a782cb0df58d/output/coco\"\n",
    "adv_path = 'data/json_files/coco_pope_adversarial.json'\n",
    "pop_path = 'data/json_files/coco_pope_popular.json'\n",
    "rand_path = 'data/json_files/coco_pope_random.json'\n",
    "\n",
    "#### set the path to CoCo2014 val set\n",
    "image_path = ''\n",
    "\n",
    "ckpt_path = 'internlm/internlm-xcomposer2-vl-7b'\n",
    "tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)\n",
    "model = AutoModelForCausalLM.from_pretrained(ckpt_path, device_map=\"cuda\", trust_remote_code=True).eval().cuda().half()\n",
    "model.tokenizer = tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57856faa",
   "metadata": {},
   "outputs": [],
   "source": [
    "def eval_func(pred_list, label_list):\n",
    "    pos = 1\n",
    "    neg = 0\n",
    "    yes_ratio = pred_list.count(1) / len(pred_list)\n",
    "\n",
    "    TP, TN, FP, FN = 0, 0, 0, 0\n",
    "    for pred, label in zip(pred_list, label_list):\n",
    "        if pred == pos and label == pos:\n",
    "            TP += 1\n",
    "        elif pred == pos and label == neg:\n",
    "            FP += 1\n",
    "        elif pred == neg and label == neg:\n",
    "            TN += 1\n",
    "        elif pred == neg and label == pos:\n",
    "            FN += 1\n",
    "\n",
    "    print('TP\\tFP\\tTN\\tFN\\t')\n",
    "    print('{}\\t{}\\t{}\\t{}'.format(TP, FP, TN, FN))\n",
    "\n",
    "    precision = float(TP) / float(TP + FP)\n",
    "    recall = float(TP) / float(TP + FN)\n",
    "    f1 = 2*precision*recall / (precision + recall)\n",
    "    acc = (TP + TN) / (TP + TN + FP + FN)\n",
    "    print('Accuracy: {}'.format(acc))\n",
    "    print('Precision: {}'.format(precision))\n",
    "    print('Recall: {}'.format(recall))\n",
    "    print('F1 score: {}'.format(f1))\n",
    "    print('Yes ratio: {}'.format(yes_ratio))\n",
    "    print('%.3f, %.3f, %.3f, %.3f, %.3f' % (f1, acc, precision, recall, yes_ratio) )\n",
    "    return f1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a44946f3",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "samples = load_jsonl(adv_path)\n",
    "\n",
    "pred_list = []\n",
    "label_list = []\n",
    "for q in tqdm(samples):     \n",
    "    im_path = os.path.join(image_path, q['image'])\n",
    "    txt = '[UNUSED_TOKEN_146]user\\nAnswer the question using a single word or phrase.{}[UNUSED_TOKEN_145]\\n[UNUSED_TOKEN_146]assistant\\n'.format(q['text'])\n",
    "    with torch.cuda.amp.autocast(): \n",
    "        out = model_gen(model, txt, im_path)   \n",
    "    ans = q['label'] \n",
    "    pred_list.append(0 if out == 'no' else 1)\n",
    "    label_list.append(0 if ans == 'no' else 1) \n",
    "    \n",
    "adversarial_f1 = eval_func(pred_list, label_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "292aacfc",
   "metadata": {},
   "outputs": [],
   "source": [
    "samples = load_jsonl(pop_path)\n",
    "\n",
    "pred_list = []\n",
    "label_list = []\n",
    "for q in tqdm(samples):     \n",
    "    im_path = os.path.join(image_path, q['image'])\n",
    "    txt = '[UNUSED_TOKEN_146]user\\nAnswer the question using a single word or phrase.{}[UNUSED_TOKEN_145]\\n[UNUSED_TOKEN_146]assistant\\n'.format(q['text'])\n",
    "    with torch.cuda.amp.autocast(): \n",
    "        out = model_gen(model, txt, im_path)   \n",
    "    ans = q['label'] \n",
    "    pred_list.append(0 if out == 'no' else 1)\n",
    "    label_list.append(0 if ans == 'no' else 1) \n",
    "    \n",
    "popular_f1 = eval_func(pred_list, label_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7520ed20",
   "metadata": {},
   "outputs": [],
   "source": [
    "samples = load_jsonl(rand_path)  \n",
    "\n",
    "pred_list = []\n",
    "label_list = []\n",
    "for q in tqdm(samples):     \n",
    "    im_path = os.path.join(image_path, q['image'])\n",
    "    txt = '[UNUSED_TOKEN_146]user\\nAnswer the question using a single word or phrase.{}[UNUSED_TOKEN_145]\\n[UNUSED_TOKEN_146]assistant\\n'.format(q['text'])\n",
    "    with torch.cuda.amp.autocast(): \n",
    "        out = model_gen(model, txt, im_path)   \n",
    "    ans = q['label'] \n",
    "    pred_list.append(0 if out == 'no' else 1)\n",
    "    label_list.append(0 if ans == 'no' else 1) \n",
    "    \n",
    "random_f1 = eval_func(pred_list, label_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae33257c",
   "metadata": {},
   "outputs": [],
   "source": [
    "print ((random_f1 + popular_f1 + adversarial_f1) /3 )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d7426b3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
